library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(tidyverse)
Warning: package ‘tidyverse’ was built under R version 4.2.3Warning: package ‘ggplot2’ was built under R version 4.2.3Warning: package ‘tidyr’ was built under R version 4.2.3Warning: package ‘readr’ was built under R version 4.2.3Warning: package ‘purrr’ was built under R version 4.2.3Warning: package ‘forcats’ was built under R version 4.2.3Warning: package ‘lubridate’ was built under R version 4.2.3── Attaching core tidyverse packages ──────────────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ readr     2.1.4
✔ ggplot2   3.4.3     ✔ stringr   1.5.0
✔ lubridate 1.9.2     ✔ tibble    3.1.8
✔ purrr     1.0.2     ✔ tidyr     1.3.0── Conflicts ────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(tidyr)
library(corrplot)
Warning: package ‘corrplot’ was built under R version 4.2.3corrplot 0.92 loaded
library(plotly)
Warning: package ‘plotly’ was built under R version 4.2.3Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
Registered S3 methods overwritten by 'htmltools':
  method               from         
  print.html           tools:rstudio
  print.shiny.tag      tools:rstudio
  print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio

Attaching package: ‘plotly’

The following object is masked from ‘package:ggplot2’:

    last_plot

The following object is masked from ‘package:stats’:

    filter

The following object is masked from ‘package:graphics’:

    layout
library(caTools)
Warning: package ‘caTools’ was built under R version 4.2.3
library(car)
Warning: package ‘car’ was built under R version 4.2.3Loading required package: carData
Warning: package ‘carData’ was built under R version 4.2.3
Attaching package: ‘car’

The following object is masked from ‘package:purrr’:

    some

The following object is masked from ‘package:dplyr’:

    recode
Mat = read.csv('studentMat.csv')

Preprocessing

Converting Non numeric to Numeric

non_numeric_columns <- names(Mat)[sapply(Mat, function(col) !is.numeric(col))]
cat("These are non-numeric columns:", non_numeric_columns, "\n\n")
These are non-numeric columns: school sex address famsize Pstatus Mjob Fjob reason guardian schoolsup famsup paid activities nursery higher internet romantic 
numeric_columns <- names(Mat)[sapply(Mat, is.numeric)]
cat("These are numeric columns:", numeric_columns, "\n")
These are numeric columns: age Medu Fedu traveltime studytime failures famrel freetime goout Dalc Walc health absences G1 G2 G3 

We have to convert Non numeric data to numeric data to find the correlation coeff

# Mat$sex <- ifelse(Mat$sex == 'M', 1, 0)
# non_numeric_columns <- c('school', 'address', 'famsize', 'Pstatus', 'Mjob', 'Fjob', 'reason', 'guardian', 'schoolsup', 'famsup', 'paid', 'activities', 'nursery', 'higher', 'internet', 'romantic')

# for(col in non_numeric_columns) {
#   if(length(unique(Mat[[col]])) == 2) {
#     # Label Encoding
#     levels <- unique(Mat[[col]])
#     Mat[[col]] <- ifelse(Mat[[col]] == levels[1], 1, 0)
#   } else {
#     # One-Hot Encoding
#     Mat <- Mat %>%
#       mutate(!!col := as.factor(!!sym(col))) %>%
#       spread(!!col, !!col, fill = 0, convert = TRUE)
#   }
# }
non_numeric <- names(Mat)[sapply(Mat, function(col) !is.numeric(col))]

for (col in non_numeric){
  print(paste("The column ", col, "has ", length(unique(Mat[[col]])), ' unique values they are: '))
  print((unique(Mat[[col]])))
  cat('\n')
}
[1] "The column  school has  2  unique values they are: "
[1] "GP" "MS"

[1] "The column  sex has  2  unique values they are: "
[1] "F" "M"

[1] "The column  address has  2  unique values they are: "
[1] "U" "R"

[1] "The column  famsize has  2  unique values they are: "
[1] "GT3" "LE3"

[1] "The column  Pstatus has  2  unique values they are: "
[1] "A" "T"

[1] "The column  Mjob has  5  unique values they are: "
[1] "at_home"  "health"   "other"    "services" "teacher" 

[1] "The column  Fjob has  5  unique values they are: "
[1] "teacher"  "other"    "services" "health"   "at_home" 

[1] "The column  reason has  4  unique values they are: "
[1] "course"     "other"      "home"       "reputation"

[1] "The column  guardian has  3  unique values they are: "
[1] "mother" "father" "other" 

[1] "The column  schoolsup has  2  unique values they are: "
[1] "yes" "no" 

[1] "The column  famsup has  2  unique values they are: "
[1] "no"  "yes"

[1] "The column  paid has  2  unique values they are: "
[1] "no"  "yes"

[1] "The column  activities has  2  unique values they are: "
[1] "no"  "yes"

[1] "The column  nursery has  2  unique values they are: "
[1] "yes" "no" 

[1] "The column  higher has  2  unique values they are: "
[1] "yes" "no" 

[1] "The column  internet has  2  unique values they are: "
[1] "no"  "yes"

[1] "The column  romantic has  2  unique values they are: "
[1] "no"  "yes"
binary_columns <- c()
multi_unique_columns <- c()

# Loop through each column and categorize based on the number of unique values
for (col in non_numeric) {
    num_unique <- length(unique(Mat[[col]]))
    
    if (num_unique == 2) {
        binary_columns <- c(binary_columns, col)
    } else if (num_unique > 2) {
        multi_unique_columns <- c(multi_unique_columns, col)
    }
}
# for (col in binary_columns) {
#   unique_vals <- unique(Mat[[col]])
#   Mat[[col]] <- ifelse(Mat[[col]] == unique_vals[1], 0, 1)
# }
print(paste("Binary columns:", paste(binary_columns, collapse = ", ")))
[1] "Binary columns: school, sex, address, famsize, Pstatus, schoolsup, famsup, paid, activities, nursery, higher, internet, romantic"
cat('\n')
print(paste("Multi unique value columns:", paste(multi_unique_columns, collapse = ", ")))
[1] "Multi unique value columns: Mjob, Fjob, reason, guardian"

for (col in binary_columns) {
  if ("yes" %in% Mat[[col]] && "no" %in% Mat[[col]]) {
    # If the column has 'yes' and 'no' values
    Mat[[col]] <- ifelse(Mat[[col]] == "yes", 1, 0)
  } else {
    # For other binary columns
    unique_vals <- unique(Mat[[col]])
    Mat[[col]] <- ifelse(Mat[[col]] == unique_vals[1], 0, 1)
    print(paste(unique_vals[1],':0', unique_vals[2],':1'))
  }
}
[1] "GP :0 MS :1"
[1] "F :0 M :1"
[1] "U :0 R :1"
[1] "GT3 :0 LE3 :1"
[1] "A :0 T :1"
# Perform one-hot encoding for each column in multi_unique_columns
for (col in multi_unique_columns) {
  # Create a one-hot encoded matrix for the column
  formula_str <- paste("~ 0 +", col)
  one_hot <- model.matrix(as.formula(formula_str), data = Mat)
  
  # Convert matrix to data frame and set column names
  one_hot_df <- as.data.frame(one_hot)
  colnames(one_hot_df) <- gsub("^.\\.", col, colnames(one_hot_df))
  
  # Bind the new one-hot encoded columns to the original data frame
  Mat <- cbind(Mat, one_hot_df)
  
  # Remove the original column
  Mat[[col]] <- NULL
}

# View the first few rows of the processed data to verify
head(Mat)
NA
cor_matrix <- cor(Mat, use = "complete.obs")
corrplot(cor_matrix, method = "circle")

plot_ly(
  x = colnames(cor_matrix),
  y = rownames(cor_matrix),
  z = cor_matrix,
  type = "heatmap",
  colorscale = "Viridis"
) %>%
  layout(title_text = "Correlation Matrix Heatmap")
Warning: 'layout' objects don't have these attributes: 'title_text'
Valid attributes include:
'_deprecated', 'activeshape', 'annotations', 'autosize', 'autotypenumbers', 'calendar', 'clickmode', 'coloraxis', 'colorscale', 'colorway', 'computed', 'datarevision', 'dragmode', 'editrevision', 'editType', 'font', 'geo', 'grid', 'height', 'hidesources', 'hoverdistance', 'hoverlabel', 'hovermode', 'images', 'legend', 'mapbox', 'margin', 'meta', 'metasrc', 'modebar', 'newshape', 'paper_bgcolor', 'plot_bgcolor', 'polar', 'scene', 'selectdirection', 'selectionrevision', 'separators', 'shapes', 'showlegend', 'sliders', 'smith', 'spikedistance', 'template', 'ternary', 'title', 'transition', 'uirevision', 'uniformtext', 'updatemenus', 'width', 'xaxis', 'yaxis', 'barmode', 'bargap', 'mapType'
Warning: 'layout' objects don't have these attributes: 'title_text'
Valid attributes include:
'_deprecated', 'activeshape', 'annotations', 'autosize', 'autotypenumbers', 'calendar', 'clickmode', 'coloraxis', 'colorscale', 'colorway', 'computed', 'datarevision', 'dragmode', 'editrevision', 'editType', 'font', 'geo', 'grid', 'height', 'hidesources', 'hoverdistance', 'hoverlabel', 'hovermode', 'images', 'legend', 'mapbox', 'margin', 'meta', 'metasrc', 'modebar', 'newshape', 'paper_bgcolor', 'plot_bgcolor', 'polar', 'scene', 'selectdirection', 'selectionrevision', 'separators', 'shapes', 'showlegend', 'sliders', 'smith', 'spikedistance', 'template', 'ternary', 'title', 'transition', 'uirevision', 'uniformtext', 'updatemenus', 'width', 'xaxis', 'yaxis', 'barmode', 'bargap', 'mapType'
G3_correlation <- cor_matrix[,'G3']
library(plotly)

plot_ly(x = names(G3_correlation), y = G3_correlation, type = 'bar') %>%
  layout(title = "Correlation of G3 with Other Variables", yaxis = list(title = "Correlation Coefficient"))
NA
print(G3_correlation)
          school              sex              age          address          famsize 
     -0.04501694       0.10345565      -0.16157944      -0.10575606       0.08140711 
         Pstatus             Medu             Fedu       traveltime        studytime 
     -0.05800898       0.21714750       0.15245694      -0.11714205       0.09781969 
        failures        schoolsup           famsup             paid       activities 
     -0.36041494      -0.08278821      -0.03915715       0.10199624       0.01609970 
         nursery           higher         internet         romantic           famrel 
      0.05156790       0.18246462       0.09848337      -0.12996995       0.05136343 
        freetime            goout             Dalc             Walc           health 
      0.01130724      -0.13279147      -0.05466004      -0.05193932      -0.06133460 
        absences               G1               G2               G3      Mjobat_home 
      0.03424732       0.80146793       0.90486799       1.00000000      -0.11563430 
      Mjobhealth        Mjobother     Mjobservices      Mjobteacher      Fjobat_home 
      0.11615799      -0.09647737       0.07842887       0.05771238      -0.01338457 
      Fjobhealth        Fjobother     Fjobservices      Fjobteacher     reasoncourse 
      0.05711055      -0.05348341      -0.01610783       0.09537401      -0.09894964 
      reasonhome      reasonother reasonreputation   guardianfather   guardianmother 
     -0.02135917       0.05200772       0.09569223       0.03249322       0.02233775 
   guardianother 
     -0.08777445 
# Sorting correlations in decreasing order to get top positive correlations
top_positive <- sort(G3_correlation, decreasing = TRUE)[1:10]

# Sorting correlations in increasing order to get top negative correlations
top_negative <- sort(G3_correlation)[1:10]

print("Top Positive Correlations:")
[1] "Top Positive Correlations:"
print(top_positive)
        G3         G2         G1       Medu     higher       Fedu Mjobhealth        sex 
1.00000000 0.90486799 0.80146793 0.21714750 0.18246462 0.15245694 0.11615799 0.10345565 
      paid   internet 
0.10199624 0.09848337 
print("Top Negative Correlations:")
[1] "Top Negative Correlations:"
print(top_negative)
     failures           age         goout      romantic    traveltime   Mjobat_home 
  -0.36041494   -0.16157944   -0.13279147   -0.12996995   -0.11714205   -0.11563430 
      address  reasoncourse     Mjobother guardianother 
  -0.10575606   -0.09894964   -0.09647737   -0.08777445 
# Sort G3_correlation in increasing order
sorted_names <- names(sort(G3_correlation))

# Convert variable names to a factor with levels specified by the sorted order
factor_names <- factor(names(G3_correlation), levels = sorted_names)

# Plot using plotly
plot_ly(x = factor_names, y = G3_correlation, type = 'bar') %>%
  layout(title = "Correlation of G3 with Other Variables in Increasing Order", 
         yaxis = list(title = "Correlation Coefficient"))
NA
NA
Mat_alc <- Mat
Mat_alc$Avg_alc <- (Mat_alc$Dalc + Mat_alc$Walc)/2
Mat_alc[,c("Avg_alc", "Dalc", "Walc")]
Mat_alc <- Mat_alc %>% select(-Dalc, -Walc)
Mat_alc
Mat_alc <- Mat_alc %>% select(-G1, -G2,-G3)
corr_mat = cor(Mat_alc)
corrplot(corr_mat)

Avg_alc_correlation <- corr_mat[,'Avg_alc']


plot_ly(x = names(Avg_alc_correlation), y = Avg_alc_correlation, type = 'bar') %>%
  layout(title = "Correlation of Avg_alc with Other Variables", yaxis = list(title = "Correlation Coefficient"))

sorted_names <- names(sort(Avg_alc_correlation))


factor_names <- factor(names(Avg_alc_correlation), levels = sorted_names)

plot_ly(x = factor_names, y = Avg_alc_correlation, type = 'bar') %>%
  layout(title = "Correlation of Avg_alc with Other Variables in Increasing Order", 
         yaxis = list(title = "Correlation Coefficient"))
NA
Avg_alc_correlation
          school              sex              age          address          famsize 
    0.0935079933     0.2983306751     0.1349722737     0.1075991608     0.1126945704 
         Pstatus             Medu             Fedu       traveltime        studytime 
   -0.0098078682    -0.0216807701    -0.0071265013     0.1491336864    -0.2526978701 
        failures        schoolsup           famsup             paid       activities 
    0.1532033260    -0.0662068126    -0.0704349862     0.0672743608    -0.0541766778 
         nursery           higher         internet         romantic           famrel 
   -0.1026850313    -0.0964658432     0.0238390654     0.0002056756    -0.1084265610 
        freetime            goout           health         absences      Mjobat_home 
    0.1897535472     0.3926829382     0.0946623616     0.1386874792    -0.0091633057 
      Mjobhealth        Mjobother     Mjobservices      Mjobteacher      Fjobat_home 
   -0.0193705820    -0.0076611840     0.0100910644     0.0224310187    -0.0899757864 
      Fjobhealth        Fjobother     Fjobservices      Fjobteacher     reasoncourse 
   -0.0606312159    -0.0014299118     0.1088181083    -0.0606931394     0.0080510716 
      reasonhome      reasonother reasonreputation   guardianfather   guardianmother 
    0.0138185842     0.1296121075    -0.1072067611     0.0289511097    -0.0187935792 
   guardianother          Avg_alc 
   -0.0126818299     1.0000000000 
Mat_alc
# Assuming your data is in a variable named 'Mat_alc'



# 2. Split the data into training and testing sets
set.seed(123) # Setting seed for reproducibility
split = sample.split(Mat_alc$Avg_alc, SplitRatio = 0.8)
train_data = subset(Mat_alc, split == TRUE)
test_data = subset(Mat_alc, split == FALSE)

# 3. Train a linear regression model using the training set
model <- lm(Avg_alc ~ ., data = train_data)  # The dot means we are using all other columns as predictors

# 4. Evaluate the model using the testing set
predictions = predict(model, newdata = test_data)
Warning: prediction from a rank-deficient fit may be misleading
mse = mean((predictions - test_data$Avg_alc)^2)  # Mean Squared Error

print(mse)
[1] 0.7233548
# Additionally, you can print a summary of the model to inspect coefficients and other statistics
print(summary(model))

Call:
lm(formula = Avg_alc ~ ., data = train_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.07721 -0.52527 -0.05375  0.41627  2.98475 

Coefficients: (4 not defined because of singularities)
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)       6.322e-01  1.162e+00   0.544 0.586784    
school           -1.641e-01  1.930e-01  -0.850 0.396053    
sex               3.533e-01  1.088e-01   3.248 0.001303 ** 
age               1.663e-02  5.030e-02   0.331 0.741126    
address           3.879e-01  1.299e-01   2.986 0.003076 ** 
famsize           1.463e-01  1.088e-01   1.345 0.179763    
Pstatus          -1.937e-01  1.590e-01  -1.218 0.224242    
Medu             -4.710e-02  7.168e-02  -0.657 0.511704    
Fedu              6.931e-02  6.231e-02   1.112 0.266903    
traveltime        1.332e-01  7.649e-02   1.741 0.082738 .  
studytime        -1.043e-01  6.487e-02  -1.608 0.109058    
failures          2.616e-05  7.465e-02   0.000 0.999721    
schoolsup        -1.563e-01  1.523e-01  -1.026 0.305609    
famsup           -1.549e-02  1.069e-01  -0.145 0.884877    
paid              1.886e-01  1.057e-01   1.784 0.075578 .  
activities       -2.104e-01  9.900e-02  -2.125 0.034478 *  
nursery          -2.773e-01  1.221e-01  -2.271 0.023906 *  
higher            9.098e-02  2.474e-01   0.368 0.713402    
internet          4.596e-02  1.395e-01   0.329 0.742087    
romantic         -1.356e-01  1.073e-01  -1.263 0.207541    
famrel           -2.024e-01  5.247e-02  -3.858 0.000142 ***
freetime          6.878e-02  5.369e-02   1.281 0.201243    
goout             3.368e-01  4.647e-02   7.248 4.19e-12 ***
health            6.786e-02  3.625e-02   1.872 0.062276 .  
absences          9.089e-03  6.263e-03   1.451 0.147860    
Mjobat_home      -4.918e-02  2.409e-01  -0.204 0.838389    
Mjobhealth       -1.625e-01  2.114e-01  -0.769 0.442661    
Mjobother         4.914e-03  1.876e-01   0.026 0.979118    
Mjobservices     -1.105e-01  1.729e-01  -0.639 0.523327    
Mjobteacher              NA         NA      NA       NA    
Fjobat_home       2.854e-02  3.249e-01   0.088 0.930063    
Fjobhealth        1.710e-01  2.960e-01   0.578 0.564066    
Fjobother         1.854e-01  2.118e-01   0.876 0.381918    
Fjobservices      4.603e-01  2.175e-01   2.116 0.035209 *  
Fjobteacher              NA         NA      NA       NA    
reasoncourse     -3.971e-02  1.275e-01  -0.312 0.755613    
reasonhome        5.426e-02  1.327e-01   0.409 0.682887    
reasonother       1.970e-01  1.960e-01   1.005 0.315694    
reasonreputation         NA         NA      NA       NA    
guardianfather    1.541e-01  2.236e-01   0.689 0.491250    
guardianmother    3.250e-02  2.086e-01   0.156 0.876331    
guardianother            NA         NA      NA       NA    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8229 on 278 degrees of freedom
Multiple R-squared:  0.3871,    Adjusted R-squared:  0.3055 
F-statistic: 4.745 on 37 and 278 DF,  p-value: 1.385e-14
# Removing redudant variables for 1-hot encoding 
Mat_alc <- Mat_alc %>% select(-guardianother, -reasonreputation, -Fjobteacher, -Mjobteacher)
set.seed(123) 
split = sample.split(Mat_alc$Avg_alc, SplitRatio = 0.8)
train_data = subset(Mat_alc, split == TRUE)
test_data = subset(Mat_alc, split == FALSE)


model <- lm(Avg_alc ~ ., data = train_data)   

predictions = predict(model, newdata = test_data)
mse = mean((predictions - test_data$Avg_alc)^2)  

print(mse)
[1] 0.7233548
print(summary(model))

Call:
lm(formula = Avg_alc ~ ., data = train_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.07721 -0.52527 -0.05375  0.41627  2.98475 

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)     6.322e-01  1.162e+00   0.544 0.586784    
school         -1.641e-01  1.930e-01  -0.850 0.396053    
sex             3.533e-01  1.088e-01   3.248 0.001303 ** 
age             1.663e-02  5.030e-02   0.331 0.741126    
address         3.879e-01  1.299e-01   2.986 0.003076 ** 
famsize         1.463e-01  1.088e-01   1.345 0.179763    
Pstatus        -1.937e-01  1.590e-01  -1.218 0.224242    
Medu           -4.710e-02  7.168e-02  -0.657 0.511704    
Fedu            6.931e-02  6.231e-02   1.112 0.266903    
traveltime      1.332e-01  7.649e-02   1.741 0.082738 .  
studytime      -1.043e-01  6.487e-02  -1.608 0.109058    
failures        2.616e-05  7.465e-02   0.000 0.999721    
schoolsup      -1.563e-01  1.523e-01  -1.026 0.305609    
famsup         -1.549e-02  1.069e-01  -0.145 0.884877    
paid            1.886e-01  1.057e-01   1.784 0.075578 .  
activities     -2.104e-01  9.900e-02  -2.125 0.034478 *  
nursery        -2.773e-01  1.221e-01  -2.271 0.023906 *  
higher          9.098e-02  2.474e-01   0.368 0.713402    
internet        4.596e-02  1.395e-01   0.329 0.742087    
romantic       -1.356e-01  1.073e-01  -1.263 0.207541    
famrel         -2.024e-01  5.247e-02  -3.858 0.000142 ***
freetime        6.878e-02  5.369e-02   1.281 0.201243    
goout           3.368e-01  4.647e-02   7.248 4.19e-12 ***
health          6.786e-02  3.625e-02   1.872 0.062276 .  
absences        9.089e-03  6.263e-03   1.451 0.147860    
Mjobat_home    -4.918e-02  2.409e-01  -0.204 0.838389    
Mjobhealth     -1.625e-01  2.114e-01  -0.769 0.442661    
Mjobother       4.914e-03  1.876e-01   0.026 0.979118    
Mjobservices   -1.105e-01  1.729e-01  -0.639 0.523327    
Fjobat_home     2.854e-02  3.249e-01   0.088 0.930063    
Fjobhealth      1.710e-01  2.960e-01   0.578 0.564066    
Fjobother       1.854e-01  2.118e-01   0.876 0.381918    
Fjobservices    4.603e-01  2.175e-01   2.116 0.035209 *  
reasoncourse   -3.971e-02  1.275e-01  -0.312 0.755613    
reasonhome      5.426e-02  1.327e-01   0.409 0.682887    
reasonother     1.970e-01  1.960e-01   1.005 0.315694    
guardianfather  1.541e-01  2.236e-01   0.689 0.491250    
guardianmother  3.250e-02  2.086e-01   0.156 0.876331    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8229 on 278 degrees of freedom
Multiple R-squared:  0.3871,    Adjusted R-squared:  0.3055 
F-statistic: 4.745 on 37 and 278 DF,  p-value: 1.385e-14

vif_model <- vif(model)  
print(vif_model)
        school            sex            age        address        famsize 
      1.538024       1.375623       1.810255       1.385124       1.161434 
       Pstatus           Medu           Fedu     traveltime      studytime 
      1.161829       2.862306       2.195068       1.312992       1.357516 
      failures      schoolsup         famsup           paid     activities 
      1.454386       1.221372       1.255041       1.296218       1.142498 
       nursery         higher       internet       romantic         famrel 
      1.161971       1.291811       1.248884       1.181200       1.101477 
      freetime          goout         health       absences    Mjobat_home 
      1.309819       1.247316       1.177664       1.270030       3.183240 
    Mjobhealth      Mjobother   Mjobservices    Fjobat_home     Fjobhealth 
      1.791567       3.799501       2.702002       1.799030       1.848736 
     Fjobother   Fjobservices   reasoncourse     reasonhome    reasonother 
      5.155180       4.496349       1.755050       1.627323       1.400665 
guardianfather guardianmother 
      4.182718       4.393150 
vif_model[vif_model >= 5]
Fjobother 
  5.15518 

Dropping Fjobother because the VIF value > 5 Generally, * 1 = not correlated. * Between 1 and 5 = moderately correlated. * Greater than 5 = highly correlated.

vif_model[vif_model > 3 & vif_model < 5]
   Mjobat_home      Mjobother   Fjobservices guardianfather guardianmother 
      3.183240       3.799501       4.496349       4.182718       4.393150 
vif_model[vif_model <=1]
named numeric(0)
Mat_alc <- Mat_alc %>% select(-Fjobother)
set.seed(123) 
split = sample.split(Mat_alc$Avg_alc, SplitRatio = 0.8)
train_data = subset(Mat_alc, split == TRUE)
test_data = subset(Mat_alc, split == FALSE)


model <- lm(Avg_alc ~ ., data = train_data)   

predictions = predict(model, newdata = test_data)
mse = mean((predictions - test_data$Avg_alc)^2)  

print(mse)
[1] 0.740984
print(summary(model))

Call:
lm(formula = Avg_alc ~ ., data = train_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.05284 -0.51987 -0.07558  0.40831  2.99398 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)     0.842181   1.136370   0.741 0.459247    
school         -0.156950   0.192762  -0.814 0.416215    
sex             0.353346   0.108724   3.250 0.001296 ** 
age             0.015411   0.050257   0.307 0.759343    
address         0.392068   0.129753   3.022 0.002747 ** 
famsize         0.148944   0.108718   1.370 0.171790    
Pstatus        -0.197511   0.158871  -1.243 0.214832    
Medu           -0.046960   0.071652  -0.655 0.512752    
Fedu            0.055838   0.060352   0.925 0.355656    
traveltime      0.129915   0.076367   1.701 0.090023 .  
studytime      -0.104700   0.064842  -1.615 0.107503    
failures       -0.003079   0.074530  -0.041 0.967081    
schoolsup      -0.169465   0.151445  -1.119 0.264111    
famsup         -0.012351   0.106756  -0.116 0.907979    
paid            0.195821   0.105348   1.859 0.064107 .  
activities     -0.203463   0.098642  -2.063 0.040071 *  
nursery        -0.280195   0.121990  -2.297 0.022367 *  
higher          0.082193   0.247138   0.333 0.739703    
internet        0.055174   0.139074   0.397 0.691874    
romantic       -0.143141   0.106950  -1.338 0.181858    
famrel         -0.201111   0.052425  -3.836 0.000155 ***
freetime        0.065689   0.053551   1.227 0.220985    
goout           0.340353   0.046276   7.355 2.14e-12 ***
health          0.067706   0.036236   1.868 0.062741 .  
absences        0.009262   0.006258   1.480 0.139982    
Mjobat_home    -0.026317   0.239376  -0.110 0.912537    
Mjobhealth     -0.133108   0.208617  -0.638 0.523963    
Mjobother       0.035265   0.184246   0.191 0.848350    
Mjobservices   -0.092509   0.171619  -0.539 0.590293    
Fjobat_home    -0.137286   0.263865  -0.520 0.603274    
Fjobhealth      0.026263   0.245520   0.107 0.914890    
Fjobservices    0.300141   0.117694   2.550 0.011301 *  
reasoncourse   -0.039207   0.127415  -0.308 0.758529    
reasonhome      0.056190   0.132614   0.424 0.672107    
reasonother     0.170396   0.193544   0.880 0.379402    
guardianfather  0.133871   0.222282   0.602 0.547492    
guardianmother  0.018863   0.207958   0.091 0.927793    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8226 on 279 degrees of freedom
Multiple R-squared:  0.3854,    Adjusted R-squared:  0.3061 
F-statistic:  4.86 on 36 and 279 DF,  p-value: 8.419e-15

Important Features based on the Significance codes sex, address, traveltime, paid, activities, nursery, famrel, goout, health, Fjobservices

model_summary <- summary(model)
estimates <- model_summary$coefficients[, "Estimate"]
ordered_estimates_desc <- estimates[order(-estimates)]
print(ordered_estimates_desc)
   (Intercept)        address            sex          goout   Fjobservices 
   0.842180660    0.392068180    0.353346417    0.340353023    0.300140840 
          paid    reasonother        famsize guardianfather     traveltime 
   0.195821395    0.170395744    0.148943530    0.133870884    0.129914894 
        higher         health       freetime     reasonhome           Fedu 
   0.082192675    0.067706007    0.065689148    0.056189569    0.055837905 
      internet      Mjobother     Fjobhealth guardianmother            age 
   0.055174216    0.035265094    0.026263021    0.018862618    0.015411085 
      absences       failures         famsup    Mjobat_home   reasoncourse 
   0.009261926   -0.003078618   -0.012350996   -0.026316601   -0.039207470 
          Medu   Mjobservices      studytime     Mjobhealth    Fjobat_home 
  -0.046960243   -0.092508832   -0.104700495   -0.133108429   -0.137286283 
      romantic         school      schoolsup        Pstatus         famrel 
  -0.143140525   -0.156949701   -0.169464562   -0.197511036   -0.201110639 
    activities        nursery 
  -0.203463393   -0.280194864 
library(ggplot2)

# Convert ordered estimates to a data frame
df_estimates <- data.frame(Predictor = names(ordered_estimates_desc),
                           Estimate = ordered_estimates_desc)

# Plot using ggplot2
plot <- ggplot(df_estimates, aes(x = reorder(Predictor, Estimate), y = Estimate)) +
  geom_bar(stat = "identity", fill = "lightblue") +
  coord_flip() + 
  labs(title = "Ordered Estimates from the Model",
       x = "Predictors",
       y = "Coefficient Value") +
  theme_minimal()

plot

LS0tDQp0aXRsZTogIkFsY29ob2wgY29uc3VtcHRpb24gYnkgUG9ydHVndWVzZSBzdHVkZW50cyINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KYXV0aG9yOiBTaGl2YSBLdW1hciBQZW5kZW0sIFNyaWNoYXJhbiBDaGVldGksIEthdXNoaWsgUGFydmF0aGFuZW5pDQotLS0NCmBgYHtyfQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeSh0aWR5cikNCmxpYnJhcnkoY29ycnBsb3QpDQpsaWJyYXJ5KHBsb3RseSkNCmxpYnJhcnkoY2FUb29scykNCmxpYnJhcnkoY2FyKQ0KYGBgDQoNCg0KYGBge3J9DQpNYXQgPSByZWFkLmNzdignc3R1ZGVudE1hdC5jc3YnKQ0KDQpgYGANCg0KIyMjIFByZXByb2Nlc3NpbmcNCiMjIyMgQ29udmVydGluZyBOb24gbnVtZXJpYyB0byBOdW1lcmljDQoNCmBgYHtyIH0NCm5vbl9udW1lcmljX2NvbHVtbnMgPC0gbmFtZXMoTWF0KVtzYXBwbHkoTWF0LCBmdW5jdGlvbihjb2wpICFpcy5udW1lcmljKGNvbCkpXQ0KY2F0KCJUaGVzZSBhcmUgbm9uLW51bWVyaWMgY29sdW1uczoiLCBub25fbnVtZXJpY19jb2x1bW5zLCAiXG5cbiIpDQoNCm51bWVyaWNfY29sdW1ucyA8LSBuYW1lcyhNYXQpW3NhcHBseShNYXQsIGlzLm51bWVyaWMpXQ0KY2F0KCJUaGVzZSBhcmUgbnVtZXJpYyBjb2x1bW5zOiIsIG51bWVyaWNfY29sdW1ucywgIlxuIikNCg0KDQpgYGANCldlIGhhdmUgdG8gY29udmVydCBOb24gbnVtZXJpYyBkYXRhIHRvIG51bWVyaWMgZGF0YSB0byBmaW5kIHRoZSBjb3JyZWxhdGlvbiBjb2VmZg0KDQpgYGB7cn0NCiMgTWF0JHNleCA8LSBpZmVsc2UoTWF0JHNleCA9PSAnTScsIDEsIDApDQpgYGANCg0KYGBge3J9DQojIG5vbl9udW1lcmljX2NvbHVtbnMgPC0gYygnc2Nob29sJywgJ2FkZHJlc3MnLCAnZmFtc2l6ZScsICdQc3RhdHVzJywgJ01qb2InLCAnRmpvYicsICdyZWFzb24nLCAnZ3VhcmRpYW4nLCAnc2Nob29sc3VwJywgJ2ZhbXN1cCcsICdwYWlkJywgJ2FjdGl2aXRpZXMnLCAnbnVyc2VyeScsICdoaWdoZXInLCAnaW50ZXJuZXQnLCAncm9tYW50aWMnKQ0KDQojIGZvcihjb2wgaW4gbm9uX251bWVyaWNfY29sdW1ucykgew0KIyAgIGlmKGxlbmd0aCh1bmlxdWUoTWF0W1tjb2xdXSkpID09IDIpIHsNCiMgICAgICMgTGFiZWwgRW5jb2RpbmcNCiMgICAgIGxldmVscyA8LSB1bmlxdWUoTWF0W1tjb2xdXSkNCiMgICAgIE1hdFtbY29sXV0gPC0gaWZlbHNlKE1hdFtbY29sXV0gPT0gbGV2ZWxzWzFdLCAxLCAwKQ0KIyAgIH0gZWxzZSB7DQojICAgICAjIE9uZS1Ib3QgRW5jb2RpbmcNCiMgICAgIE1hdCA8LSBNYXQgJT4lDQojICAgICAgIG11dGF0ZSghIWNvbCA6PSBhcy5mYWN0b3IoISFzeW0oY29sKSkpICU+JQ0KIyAgICAgICBzcHJlYWQoISFjb2wsICEhY29sLCBmaWxsID0gMCwgY29udmVydCA9IFRSVUUpDQojICAgfQ0KIyB9DQpub25fbnVtZXJpYyA8LSBuYW1lcyhNYXQpW3NhcHBseShNYXQsIGZ1bmN0aW9uKGNvbCkgIWlzLm51bWVyaWMoY29sKSldDQoNCmZvciAoY29sIGluIG5vbl9udW1lcmljKXsNCiAgcHJpbnQocGFzdGUoIlRoZSBjb2x1bW4gIiwgY29sLCAiaGFzICIsIGxlbmd0aCh1bmlxdWUoTWF0W1tjb2xdXSkpLCAnIHVuaXF1ZSB2YWx1ZXMgdGhleSBhcmU6ICcpKQ0KICBwcmludCgodW5pcXVlKE1hdFtbY29sXV0pKSkNCiAgY2F0KCdcbicpDQp9DQpgYGANCg0KYGBge3IgQXJyYW5naW5nIEJpbmFyeSBhbmQgbm9uIGJpbmFyeSBjb2x1bW5zfQ0KYmluYXJ5X2NvbHVtbnMgPC0gYygpDQptdWx0aV91bmlxdWVfY29sdW1ucyA8LSBjKCkNCg0KIyBMb29wIHRocm91Z2ggZWFjaCBjb2x1bW4gYW5kIGNhdGVnb3JpemUgYmFzZWQgb24gdGhlIG51bWJlciBvZiB1bmlxdWUgdmFsdWVzDQpmb3IgKGNvbCBpbiBub25fbnVtZXJpYykgew0KICAgIG51bV91bmlxdWUgPC0gbGVuZ3RoKHVuaXF1ZShNYXRbW2NvbF1dKSkNCiAgICANCiAgICBpZiAobnVtX3VuaXF1ZSA9PSAyKSB7DQogICAgICAgIGJpbmFyeV9jb2x1bW5zIDwtIGMoYmluYXJ5X2NvbHVtbnMsIGNvbCkNCiAgICB9IGVsc2UgaWYgKG51bV91bmlxdWUgPiAyKSB7DQogICAgICAgIG11bHRpX3VuaXF1ZV9jb2x1bW5zIDwtIGMobXVsdGlfdW5pcXVlX2NvbHVtbnMsIGNvbCkNCiAgICB9DQp9DQojIGZvciAoY29sIGluIGJpbmFyeV9jb2x1bW5zKSB7DQojICAgdW5pcXVlX3ZhbHMgPC0gdW5pcXVlKE1hdFtbY29sXV0pDQojICAgTWF0W1tjb2xdXSA8LSBpZmVsc2UoTWF0W1tjb2xdXSA9PSB1bmlxdWVfdmFsc1sxXSwgMCwgMSkNCiMgfQ0KcHJpbnQocGFzdGUoIkJpbmFyeSBjb2x1bW5zOiIsIHBhc3RlKGJpbmFyeV9jb2x1bW5zLCBjb2xsYXBzZSA9ICIsICIpKSkNCmNhdCgnXG4nKQ0KcHJpbnQocGFzdGUoIk11bHRpIHVuaXF1ZSB2YWx1ZSBjb2x1bW5zOiIsIHBhc3RlKG11bHRpX3VuaXF1ZV9jb2x1bW5zLCBjb2xsYXBzZSA9ICIsICIpKSkNCmBgYA0KDQoNCmBgYHtyIFR1cmluZyBiaW5hcnkgY29sdW1ucyBudW1lcmljfQ0KDQpmb3IgKGNvbCBpbiBiaW5hcnlfY29sdW1ucykgew0KICBpZiAoInllcyIgJWluJSBNYXRbW2NvbF1dICYmICJubyIgJWluJSBNYXRbW2NvbF1dKSB7DQogICAgIyBJZiB0aGUgY29sdW1uIGhhcyAneWVzJyBhbmQgJ25vJyB2YWx1ZXMNCiAgICBNYXRbW2NvbF1dIDwtIGlmZWxzZShNYXRbW2NvbF1dID09ICJ5ZXMiLCAxLCAwKQ0KICB9IGVsc2Ugew0KICAgICMgRm9yIG90aGVyIGJpbmFyeSBjb2x1bW5zDQogICAgdW5pcXVlX3ZhbHMgPC0gdW5pcXVlKE1hdFtbY29sXV0pDQogICAgTWF0W1tjb2xdXSA8LSBpZmVsc2UoTWF0W1tjb2xdXSA9PSB1bmlxdWVfdmFsc1sxXSwgMCwgMSkNCiAgICBwcmludChwYXN0ZSh1bmlxdWVfdmFsc1sxXSwnOjAnLCB1bmlxdWVfdmFsc1syXSwnOjEnKSkNCiAgfQ0KfQ0KDQpgYGANCmBgYHtyIFR1cmluZyBtdWx0aXZhbHVlZCBjb2x1bW5zIG51bWVyaWN9DQojIFBlcmZvcm0gb25lLWhvdCBlbmNvZGluZyBmb3IgZWFjaCBjb2x1bW4gaW4gbXVsdGlfdW5pcXVlX2NvbHVtbnMNCmZvciAoY29sIGluIG11bHRpX3VuaXF1ZV9jb2x1bW5zKSB7DQogICMgQ3JlYXRlIGEgb25lLWhvdCBlbmNvZGVkIG1hdHJpeCBmb3IgdGhlIGNvbHVtbg0KICBmb3JtdWxhX3N0ciA8LSBwYXN0ZSgifiAwICsiLCBjb2wpDQogIG9uZV9ob3QgPC0gbW9kZWwubWF0cml4KGFzLmZvcm11bGEoZm9ybXVsYV9zdHIpLCBkYXRhID0gTWF0KQ0KICANCiAgIyBDb252ZXJ0IG1hdHJpeCB0byBkYXRhIGZyYW1lIGFuZCBzZXQgY29sdW1uIG5hbWVzDQogIG9uZV9ob3RfZGYgPC0gYXMuZGF0YS5mcmFtZShvbmVfaG90KQ0KICBjb2xuYW1lcyhvbmVfaG90X2RmKSA8LSBnc3ViKCJeLlxcLiIsIGNvbCwgY29sbmFtZXMob25lX2hvdF9kZikpDQogIA0KICAjIEJpbmQgdGhlIG5ldyBvbmUtaG90IGVuY29kZWQgY29sdW1ucyB0byB0aGUgb3JpZ2luYWwgZGF0YSBmcmFtZQ0KICBNYXQgPC0gY2JpbmQoTWF0LCBvbmVfaG90X2RmKQ0KICANCiAgIyBSZW1vdmUgdGhlIG9yaWdpbmFsIGNvbHVtbg0KICBNYXRbW2NvbF1dIDwtIE5VTEwNCn0NCg0KIyBWaWV3IHRoZSBmaXJzdCBmZXcgcm93cyBvZiB0aGUgcHJvY2Vzc2VkIGRhdGEgdG8gdmVyaWZ5DQpoZWFkKE1hdCkNCg0KYGBgDQpgYGB7cn0NCmNvcl9tYXRyaXggPC0gY29yKE1hdCwgdXNlID0gImNvbXBsZXRlLm9icyIpDQpgYGANCmBgYHtyfQ0KY29ycnBsb3QoY29yX21hdHJpeCwgbWV0aG9kID0gImNpcmNsZSIpDQpgYGANCmBgYHtyfQ0KcGxvdF9seSgNCiAgeCA9IGNvbG5hbWVzKGNvcl9tYXRyaXgpLA0KICB5ID0gcm93bmFtZXMoY29yX21hdHJpeCksDQogIHogPSBjb3JfbWF0cml4LA0KICB0eXBlID0gImhlYXRtYXAiLA0KICBjb2xvcnNjYWxlID0gIlZpcmlkaXMiDQopICU+JQ0KICBsYXlvdXQodGl0bGVfdGV4dCA9ICJDb3JyZWxhdGlvbiBNYXRyaXggSGVhdG1hcCIpDQoNCmBgYA0KYGBge3J9DQpHM19jb3JyZWxhdGlvbiA8LSBjb3JfbWF0cml4WywnRzMnXQ0KbGlicmFyeShwbG90bHkpDQoNCnBsb3RfbHkoeCA9IG5hbWVzKEczX2NvcnJlbGF0aW9uKSwgeSA9IEczX2NvcnJlbGF0aW9uLCB0eXBlID0gJ2JhcicpICU+JQ0KICBsYXlvdXQodGl0bGUgPSAiQ29ycmVsYXRpb24gb2YgRzMgd2l0aCBPdGhlciBWYXJpYWJsZXMiLCB5YXhpcyA9IGxpc3QodGl0bGUgPSAiQ29ycmVsYXRpb24gQ29lZmZpY2llbnQiKSkNCg0KYGBgDQpgYGB7cn0NCnByaW50KEczX2NvcnJlbGF0aW9uKQ0KYGBgDQpgYGB7cn0NCiMgU29ydGluZyBjb3JyZWxhdGlvbnMgaW4gZGVjcmVhc2luZyBvcmRlciB0byBnZXQgdG9wIHBvc2l0aXZlIGNvcnJlbGF0aW9ucw0KdG9wX3Bvc2l0aXZlIDwtIHNvcnQoRzNfY29ycmVsYXRpb24sIGRlY3JlYXNpbmcgPSBUUlVFKVsxOjEwXQ0KDQojIFNvcnRpbmcgY29ycmVsYXRpb25zIGluIGluY3JlYXNpbmcgb3JkZXIgdG8gZ2V0IHRvcCBuZWdhdGl2ZSBjb3JyZWxhdGlvbnMNCnRvcF9uZWdhdGl2ZSA8LSBzb3J0KEczX2NvcnJlbGF0aW9uKVsxOjEwXQ0KDQpwcmludCgiVG9wIFBvc2l0aXZlIENvcnJlbGF0aW9uczoiKQ0KcHJpbnQodG9wX3Bvc2l0aXZlKQ0KDQpwcmludCgiVG9wIE5lZ2F0aXZlIENvcnJlbGF0aW9uczoiKQ0KcHJpbnQodG9wX25lZ2F0aXZlKQ0KDQpgYGANCmBgYHtyfQ0KIyBTb3J0IEczX2NvcnJlbGF0aW9uIGluIGluY3JlYXNpbmcgb3JkZXINCnNvcnRlZF9uYW1lcyA8LSBuYW1lcyhzb3J0KEczX2NvcnJlbGF0aW9uKSkNCg0KIyBDb252ZXJ0IHZhcmlhYmxlIG5hbWVzIHRvIGEgZmFjdG9yIHdpdGggbGV2ZWxzIHNwZWNpZmllZCBieSB0aGUgc29ydGVkIG9yZGVyDQpmYWN0b3JfbmFtZXMgPC0gZmFjdG9yKG5hbWVzKEczX2NvcnJlbGF0aW9uKSwgbGV2ZWxzID0gc29ydGVkX25hbWVzKQ0KDQojIFBsb3QgdXNpbmcgcGxvdGx5DQpwbG90X2x5KHggPSBmYWN0b3JfbmFtZXMsIHkgPSBHM19jb3JyZWxhdGlvbiwgdHlwZSA9ICdiYXInKSAlPiUNCiAgbGF5b3V0KHRpdGxlID0gIkNvcnJlbGF0aW9uIG9mIEczIHdpdGggT3RoZXIgVmFyaWFibGVzIGluIEluY3JlYXNpbmcgT3JkZXIiLCANCiAgICAgICAgIHlheGlzID0gbGlzdCh0aXRsZSA9ICJDb3JyZWxhdGlvbiBDb2VmZmljaWVudCIpKQ0KDQoNCmBgYA0KDQpgYGB7ciBBbGNfY29uc3VtdGlvbn0NCk1hdF9hbGMgPC0gTWF0DQoNCmBgYA0KDQpgYGB7cn0NCk1hdF9hbGMkQXZnX2FsYyA8LSAoTWF0X2FsYyREYWxjICsgTWF0X2FsYyRXYWxjKS8yDQpgYGANCg0KYGBge3J9DQpNYXRfYWxjWyxjKCJBdmdfYWxjIiwgIkRhbGMiLCAiV2FsYyIpXQ0KYGBgDQpgYGB7cn0NCk1hdF9hbGMgPC0gTWF0X2FsYyAlPiUgc2VsZWN0KC1EYWxjLCAtV2FsYykNCmBgYA0KYGBge3J9DQpNYXRfYWxjDQpgYGANCmBgYHtyfQ0KTWF0X2FsYyA8LSBNYXRfYWxjICU+JSBzZWxlY3QoLUcxLCAtRzIsLUczKQ0KYGBgDQoNCmBgYHtyfQ0KY29ycl9tYXQgPSBjb3IoTWF0X2FsYykNCmNvcnJwbG90KGNvcnJfbWF0KQ0KYGBgDQoNCmBgYHtyfQ0KQXZnX2FsY19jb3JyZWxhdGlvbiA8LSBjb3JyX21hdFssJ0F2Z19hbGMnXQ0KDQoNCnBsb3RfbHkoeCA9IG5hbWVzKEF2Z19hbGNfY29ycmVsYXRpb24pLCB5ID0gQXZnX2FsY19jb3JyZWxhdGlvbiwgdHlwZSA9ICdiYXInKSAlPiUNCiAgbGF5b3V0KHRpdGxlID0gIkNvcnJlbGF0aW9uIG9mIEF2Z19hbGMgd2l0aCBPdGhlciBWYXJpYWJsZXMiLCB5YXhpcyA9IGxpc3QodGl0bGUgPSAiQ29ycmVsYXRpb24gQ29lZmZpY2llbnQiKSkNCmBgYA0KDQpgYGB7cn0NCg0Kc29ydGVkX25hbWVzIDwtIG5hbWVzKHNvcnQoQXZnX2FsY19jb3JyZWxhdGlvbikpDQoNCg0KZmFjdG9yX25hbWVzIDwtIGZhY3RvcihuYW1lcyhBdmdfYWxjX2NvcnJlbGF0aW9uKSwgbGV2ZWxzID0gc29ydGVkX25hbWVzKQ0KDQpwbG90X2x5KHggPSBmYWN0b3JfbmFtZXMsIHkgPSBBdmdfYWxjX2NvcnJlbGF0aW9uLCB0eXBlID0gJ2JhcicpICU+JQ0KICBsYXlvdXQodGl0bGUgPSAiQ29ycmVsYXRpb24gb2YgQXZnX2FsYyB3aXRoIE90aGVyIFZhcmlhYmxlcyBpbiBJbmNyZWFzaW5nIE9yZGVyIiwgDQogICAgICAgICB5YXhpcyA9IGxpc3QodGl0bGUgPSAiQ29ycmVsYXRpb24gQ29lZmZpY2llbnQiKSkNCg0KYGBgDQoNCmBgYHtyfQ0KQXZnX2FsY19jb3JyZWxhdGlvbg0KYGBgDQoNCmBgYHtyfQ0KTWF0X2FsYw0KYGBgDQoNCmBgYHtyfQ0KIyBBc3N1bWluZyB5b3VyIGRhdGEgaXMgaW4gYSB2YXJpYWJsZSBuYW1lZCAnTWF0X2FsYycNCg0KDQoNCiMgMi4gU3BsaXQgdGhlIGRhdGEgaW50byB0cmFpbmluZyBhbmQgdGVzdGluZyBzZXRzDQpzZXQuc2VlZCgxMjMpICMgU2V0dGluZyBzZWVkIGZvciByZXByb2R1Y2liaWxpdHkNCnNwbGl0ID0gc2FtcGxlLnNwbGl0KE1hdF9hbGMkQXZnX2FsYywgU3BsaXRSYXRpbyA9IDAuOCkNCnRyYWluX2RhdGEgPSBzdWJzZXQoTWF0X2FsYywgc3BsaXQgPT0gVFJVRSkNCnRlc3RfZGF0YSA9IHN1YnNldChNYXRfYWxjLCBzcGxpdCA9PSBGQUxTRSkNCg0KIyAzLiBUcmFpbiBhIGxpbmVhciByZWdyZXNzaW9uIG1vZGVsIHVzaW5nIHRoZSB0cmFpbmluZyBzZXQNCm1vZGVsIDwtIGxtKEF2Z19hbGMgfiAuLCBkYXRhID0gdHJhaW5fZGF0YSkgICMgVGhlIGRvdCBtZWFucyB3ZSBhcmUgdXNpbmcgYWxsIG90aGVyIGNvbHVtbnMgYXMgcHJlZGljdG9ycw0KDQojIDQuIEV2YWx1YXRlIHRoZSBtb2RlbCB1c2luZyB0aGUgdGVzdGluZyBzZXQNCnByZWRpY3Rpb25zID0gcHJlZGljdChtb2RlbCwgbmV3ZGF0YSA9IHRlc3RfZGF0YSkNCm1zZSA9IG1lYW4oKHByZWRpY3Rpb25zIC0gdGVzdF9kYXRhJEF2Z19hbGMpXjIpICAjIE1lYW4gU3F1YXJlZCBFcnJvcg0KDQpwcmludChtc2UpDQoNCiMgQWRkaXRpb25hbGx5LCB5b3UgY2FuIHByaW50IGEgc3VtbWFyeSBvZiB0aGUgbW9kZWwgdG8gaW5zcGVjdCBjb2VmZmljaWVudHMgYW5kIG90aGVyIHN0YXRpc3RpY3MNCnByaW50KHN1bW1hcnkobW9kZWwpKQ0KDQpgYGANCg0KDQoNCmBgYHtyfQ0KIyBSZW1vdmluZyByZWR1ZGFudCB2YXJpYWJsZXMgZm9yIDEtaG90IGVuY29kaW5nIA0KTWF0X2FsYyA8LSBNYXRfYWxjICU+JSBzZWxlY3QoLWd1YXJkaWFub3RoZXIsIC1yZWFzb25yZXB1dGF0aW9uLCAtRmpvYnRlYWNoZXIsIC1Nam9idGVhY2hlcikNCmBgYA0KDQoNCmBgYHtyfQ0Kc2V0LnNlZWQoMTIzKSANCnNwbGl0ID0gc2FtcGxlLnNwbGl0KE1hdF9hbGMkQXZnX2FsYywgU3BsaXRSYXRpbyA9IDAuOCkNCnRyYWluX2RhdGEgPSBzdWJzZXQoTWF0X2FsYywgc3BsaXQgPT0gVFJVRSkNCnRlc3RfZGF0YSA9IHN1YnNldChNYXRfYWxjLCBzcGxpdCA9PSBGQUxTRSkNCg0KDQptb2RlbCA8LSBsbShBdmdfYWxjIH4gLiwgZGF0YSA9IHRyYWluX2RhdGEpICAgDQoNCnByZWRpY3Rpb25zID0gcHJlZGljdChtb2RlbCwgbmV3ZGF0YSA9IHRlc3RfZGF0YSkNCm1zZSA9IG1lYW4oKHByZWRpY3Rpb25zIC0gdGVzdF9kYXRhJEF2Z19hbGMpXjIpICANCg0KcHJpbnQobXNlKQ0KDQoNCnByaW50KHN1bW1hcnkobW9kZWwpKQ0KYGBgDQpgYGB7cn0NCg0KdmlmX21vZGVsIDwtIHZpZihtb2RlbCkgIA0KcHJpbnQodmlmX21vZGVsKQ0KYGBgDQpgYGB7cn0NCnZpZl9tb2RlbFt2aWZfbW9kZWwgPj0gNV0NCmBgYA0KRHJvcHBpbmcgRmpvYm90aGVyIGJlY2F1c2UgdGhlIFZJRiB2YWx1ZSA+IDUNCkdlbmVyYWxseSwgDQoqIDEgPSBub3QgY29ycmVsYXRlZC4NCiogQmV0d2VlbiAxIGFuZCA1ID0gbW9kZXJhdGVseSBjb3JyZWxhdGVkLg0KKiBHcmVhdGVyIHRoYW4gNSA9IGhpZ2hseSBjb3JyZWxhdGVkLg0KDQpgYGB7cn0NCnZpZl9tb2RlbFt2aWZfbW9kZWwgPiAzICYgdmlmX21vZGVsIDwgNV0NCmBgYA0KYGBge3J9DQp2aWZfbW9kZWxbdmlmX21vZGVsIDw9MV0NCmBgYA0KDQoNCg0KYGBge3J9DQpNYXRfYWxjIDwtIE1hdF9hbGMgJT4lIHNlbGVjdCgtRmpvYm90aGVyKQ0KYGBgDQoNCmBgYHtyfQ0Kc2V0LnNlZWQoMTIzKSANCnNwbGl0ID0gc2FtcGxlLnNwbGl0KE1hdF9hbGMkQXZnX2FsYywgU3BsaXRSYXRpbyA9IDAuOCkNCnRyYWluX2RhdGEgPSBzdWJzZXQoTWF0X2FsYywgc3BsaXQgPT0gVFJVRSkNCnRlc3RfZGF0YSA9IHN1YnNldChNYXRfYWxjLCBzcGxpdCA9PSBGQUxTRSkNCg0KDQptb2RlbCA8LSBsbShBdmdfYWxjIH4gLiwgZGF0YSA9IHRyYWluX2RhdGEpICAgDQoNCnByZWRpY3Rpb25zID0gcHJlZGljdChtb2RlbCwgbmV3ZGF0YSA9IHRlc3RfZGF0YSkNCm1zZSA9IG1lYW4oKHByZWRpY3Rpb25zIC0gdGVzdF9kYXRhJEF2Z19hbGMpXjIpICANCg0KcHJpbnQobXNlKQ0KDQoNCnByaW50KHN1bW1hcnkobW9kZWwpKQ0KYGBgDQoNCkltcG9ydGFudCBGZWF0dXJlcyBiYXNlZCBvbiB0aGUgU2lnbmlmaWNhbmNlIGNvZGVzDQpzZXgsIGFkZHJlc3MsIHRyYXZlbHRpbWUsIHBhaWQsIGFjdGl2aXRpZXMsIG51cnNlcnksIGZhbXJlbCwgZ29vdXQsIGhlYWx0aCwgRmpvYnNlcnZpY2VzDQoNCmBgYHtyfQ0KbW9kZWxfc3VtbWFyeSA8LSBzdW1tYXJ5KG1vZGVsKQ0KZXN0aW1hdGVzIDwtIG1vZGVsX3N1bW1hcnkkY29lZmZpY2llbnRzWywgIkVzdGltYXRlIl0NCm9yZGVyZWRfZXN0aW1hdGVzX2Rlc2MgPC0gZXN0aW1hdGVzW29yZGVyKC1lc3RpbWF0ZXMpXQ0KcHJpbnQob3JkZXJlZF9lc3RpbWF0ZXNfZGVzYykNCg0KYGBgDQoNCg0KYGBge3J9DQpsaWJyYXJ5KGdncGxvdDIpDQoNCiMgQ29udmVydCBvcmRlcmVkIGVzdGltYXRlcyB0byBhIGRhdGEgZnJhbWUNCmRmX2VzdGltYXRlcyA8LSBkYXRhLmZyYW1lKFByZWRpY3RvciA9IG5hbWVzKG9yZGVyZWRfZXN0aW1hdGVzX2Rlc2MpLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgRXN0aW1hdGUgPSBvcmRlcmVkX2VzdGltYXRlc19kZXNjKQ0KDQojIFBsb3QgdXNpbmcgZ2dwbG90Mg0KcGxvdCA8LSBnZ3Bsb3QoZGZfZXN0aW1hdGVzLCBhZXMoeCA9IHJlb3JkZXIoUHJlZGljdG9yLCBFc3RpbWF0ZSksIHkgPSBFc3RpbWF0ZSkpICsNCiAgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIsIGZpbGwgPSAibGlnaHRibHVlIikgKw0KICBjb29yZF9mbGlwKCkgKyANCiAgbGFicyh0aXRsZSA9ICJPcmRlcmVkIEVzdGltYXRlcyBmcm9tIHRoZSBNb2RlbCIsDQogICAgICAgeCA9ICJQcmVkaWN0b3JzIiwNCiAgICAgICB5ID0gIkNvZWZmaWNpZW50IFZhbHVlIikgKw0KICB0aGVtZV9taW5pbWFsKCkNCg0KcGxvdA0KDQpgYGANCg0K